In [14]:
# %load kegg_parserer_ko2_KO_counts.py
#Parses the KEG htext file format
# from argparse import ArgumentParser
import re
import sys
import os
import csv
import pandas as pd
import numpy as np
In [65]:
ko2Tab=pd.read_table('ko00002_table.tab', index_col='KeggID')
All_KO_Counts=pd.read_table('PhytoAll_KO_Counts.tab', index_col='gID')
In [41]:
Out[41]:
In [66]:
from matplotlib import pyplot as plt
from matplotlib import gridspec
from itertools import combinations
import palettable.colorbrewer as b2m
import palettable as pal
import glob
from Bio import SeqIO
from string import ascii_lowercase, ascii_uppercase
from matplotlib.colors import LogNorm, NoNorm
%matplotlib inline
def HeatMap(All_insitu_Percent, columns=None,colormap=b2m.sequential.YlOrRd_9.get_mpl_colormap(), m=1e-5):
if columns:
All_insitu_Percent=All_insitu_Percent[columns]
# sckos=['SS1','SS2','SS3', 'SS4', 'SS5']
All_insitu_Percent['mean']=All_insitu_Percent.mean(skipna=1, axis=1) #calculate mean value for each class
All_insitu_Percent=All_insitu_Percent.sort(columns='mean', ascending=False)#Sort by the mean value
All_insitu_Percent=All_insitu_Percent.drop('mean',1) #drop mean column
All_insitu_Percent=All_insitu_Percent.loc[All_insitu_Percent.sum(axis=1)!=0]
col_labels=list(All_insitu_Percent.index)
row_labels=list(All_insitu_Percent.columns.values)
fig3,ax3=plt.subplots()
fig3.set_figheight(len(col_labels)/300)
fig3.set_figwidth(len(row_labels))
heatmap3 = ax3.pcolor(All_insitu_Percent, cmap=colormap, norm=LogNorm(vmin=m, vmax=All_insitu_Percent.max().max()))
#heatmap3 = ax3.pcolor(All_insitu_Percent, cmap=plt.cm.jet, vmin=0, vmax=.25)
ax3.set_xticks(np.arange(All_insitu_Percent.shape[1])+0.5, minor=False)
ax3.set_yticks(np.arange(All_insitu_Percent.shape[0])+0.5, minor=False)
ax3.invert_yaxis()
ax3.xaxis.tick_top()
ax3.margins(0,0)
ax3.set_xticklabels(row_labels, minor=False)
ax3.set_yticklabels(col_labels, minor=False)
plt.colorbar(heatmap3)
plt.show()
return fig3
In [77]:
HeatMap(All_KO_Counts)
In [76]:
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as dist
import matplotlib.pylab as pylab
inDF=All_KO_Counts
distMatrix=dist.pdist(inDF.T, 'Correlation')
distSqMatrix = dist.squareform(distMatrix)
linkageMatrix=sch.linkage(distSqMatrix)
dendro = sch.dendrogram(linkageMatrix)
leaves = dendro['leaves']
dendro = sch.dendrogram(linkageMatrix, labels=inDF.columns[leaves])
Genes associated with C00353:
Not annotated in the transcriptome:
In [108]:
def plotKOByTime(kid):
if kid in All_KO_Counts.index:
y=All_KO_Counts.loc[kid]
plt.plot(np.arange(5),y[0:5])
else:
print kid + ' is not in the index'
In [109]:
plotKOByTime('K02291')
plotKOByTime('K17841')
In [96]:
kid='K02291'
All_KO_Counts.loc[kid]
Out[96]:
In [ ]: